clear all
capture log close
program drop _all
set more off
snapshot erase _all
sysdir set PLUS "M:\Ado\Plus"

********************************************************************************************
********************************************************************************************
********************************************************************************************
*** File name: 		Calculate Raw and Pareto-Adjusted Ginis
*** Last updated: 	2/3/2016
***
*** This file reads in the raw and pareto-adjusted p's and l's data, then
***		1. Calculates the raw and pareto-adjusted gini values
***		2. Saves the results:
***		   Output Data/Raw and Pareto-Adjusted Ginis.dta
********************************************************************************************
********************************************************************************************
********************************************************************************************

cd "$directory"

********************************************************************************************
********************************************************************************************
********************************************************************************************
*** 1. Calculates the raw and pareto-adjusted gini values
*** 	a. Read in the data
***		b. Assign original p's and l's as pareto-adjusted p's and l's for cases where the survey mean was greater than the national accounts mean
*** 	c. Loop through our raw and adjusted values and calculate the raw and adjusted Ginis
*** 	d. Mark cases we did and did not adjust
********************************************************************************************
********************************************************************************************
********************************************************************************************

*****************************************************************
*****************************************************************
*** Read in the data
*****************************************************************
*****************************************************************
use "Output Data/Raw and Pareto-Adjusted P's and L's.dta", clear

*****************************************************************
*****************************************************************
*** b. Assign original p's and l's as pareto-adjusted p's and l's for cases where the survey mean was greater than the national accounts mean
*****************************************************************
*****************************************************************

replace p_adj = p if survey_mean > na_mean & survey_mean < .
replace l_adj = l if survey_mean > na_mean & survey_mean < .

*****************************************************************
*****************************************************************
*** c. Loop through our raw and adjusted p and l values and calculate the raw and adjusted Ginis
*****************************************************************
*****************************************************************

foreach type in "" "_adj" {

	**************************************
	*** Sort by p, within each survey
	**************************************
	sort countryname year surveytype p`type'

	**************************************
	*** Add an observation with p = 0 and l = 0 at the start of each category 
	**************************************
	by countryname year surveytype: gen num = _n if p`type' < .
	expand (2) if num == 1, gen(new)
	drop num
	gsort countryname year surveytype urbrur p`type' -new
	replace p`type' = 0 if new == 1
	replace l`type' = 0 if new == 1

	**************************************
	*** Calculate the area under the Lorenz curve using trapezoids
	**************************************
	by countryname year surveytype: gen area = .5 * (l`type' + l`type'[_n-1]) * (p`type'-p`type'[_n-1]) if p`type' < .
	by countryname year surveytype: egen total_area = total(area), missing

	**************************************
	*** Calculage gini
	**************************************
	gen gini`type' = (.5-total_area)/.5
	
	drop if new == 1
	drop new area total_area

}

**************************************
*** Label the variables
**************************************
label variable gini "Gini Coefficient - Survey Data"
label variable gini_adj "Gini Coefficient - Survey Data Adjusted for Missing Top Incomes"

*****************************************************************
*****************************************************************
*** d. Mark cases we did and did not adjust
*****************************************************************
*****************************************************************

**************************************
*** Mark cases we did and did not adjust
**************************************
label define updated 1 "Adjustment Made" 2 "No Adjustment - No NA Data" 3 "No Adjustment - Survey Mean < NA Mean"
gen updated = 1 if gini_adj != gini & gini_adj < .
replace updated = 2 if gini_adj == . & na_mean == .
replace updated = 3 if gini_adj == gini & survey_mean > na_mean & survey_mean < .
replace updated = 4 if gini_adj == . & na_mean > survey_mean & na_mean < .
assert updated < .
label values updated updated
label variable updated "Marks If Adjustment Made for This Survey"

**************************************
*** Check results at survey level
**************************************
preserve
contract countryname year surveytype updated
tab updated, m
restore

********************************************************************************************
********************************************************************************************
********************************************************************************************
*** 2. Save the data at survey level
********************************************************************************************
********************************************************************************************
********************************************************************************************

keep countryname region year surveytype urbrur survey_mean na_mean pop alpha survey_pct gini gini_adj updated
duplicates drop
isid countryname year surveytype

save "Output Data/Raw and Pareto-Adjusted Ginis.dta", replace
